/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2003 - 2005 by Myricom, Inc.  All rights reserved.          *
 *************************************************************************/

static const char __idstring[] = "@(#)$Id: mx_kraw.c,v 1.33.2.1 2006/11/10 07:13:07 loic Exp $";

#include "mx_arch.h"
#include "mx_misc.h"
#include "mx_instance.h"
#include "mx_malloc.h"
#include "mx_peer.h"
#include "mx_pio.h"
#include "mx_kraw.h"

void
mx_kraw_rx_intr(mx_instance_state_t *is, 
		 uint32_t data0, uint32_t data1)
{
  struct raw_eventq_entry *entry;
  int do_wakeup = 0;

  /* create an event for the user application to consume */
  mx_spin_lock(&is->raw.spinlock);
  entry = STAILQ_FIRST(&is->raw.free_events);
  STAILQ_REMOVE_HEAD(&is->raw.free_events, entries);
  entry->event.status =  MX_KRAW_RECV_COMPLETE;
  entry->event.recv_bytes = data0;
  entry->event.incoming_port = data1;

  /* queue it */
  STAILQ_INSERT_TAIL(&is->raw.pending_events, entry, entries);
  if (is->raw.wakeup_needed) {
    do_wakeup = 1;
    is->raw.wakeup_needed = 0;
  }
  mx_spin_unlock(&is->raw.spinlock);

  /* wake up anybody waiting for it */
  if (do_wakeup)
    mx_wake(&is->raw.sync);
}


void
mx_kraw_tx_intr(mx_instance_state_t *is, 
		 uint32_t data0, uint32_t data1)
{
  struct raw_eventq_entry *entry;
  int do_wakeup = 0;

  /* create an event for the user application to consume */
  mx_spin_lock(&is->raw.spinlock);
  entry = STAILQ_FIRST(&is->raw.free_events);
  STAILQ_REMOVE_HEAD(&is->raw.free_events, entries);
  entry->event.status =  MX_KRAW_SEND_COMPLETE;
  entry->event.context = data0;
  entry->event.context |= MX_U32_TO_HIGHPART(data1, entry->event.context);

  /* queue it */
  STAILQ_INSERT_TAIL(&is->raw.pending_events, entry, entries);
  if (is->raw.wakeup_needed) {
    do_wakeup = 1;
    is->raw.wakeup_needed = 0;
  }
  mx_spin_unlock(&is->raw.spinlock);

  /* wake up anybody waiting for it */
  if (do_wakeup)
    mx_wake(&is->raw.sync);
}

void
mx_kraw_tick(mx_instance_state_t *is) 
{
  int do_wakeup = 0;

  mx_spin_lock(&is->raw.spinlock);
  if (is->raw.wakeup_needed) {
    do_wakeup = 1;
    is->raw.wakeup_needed = 0;
  }
  mx_spin_unlock(&is->raw.spinlock);

  /* wake up anybody waiting for it */
  if (do_wakeup)
    mx_wake(&is->raw.sync);
}

int
mx_kraw_next_event(mx_endpt_state_t *es,
		  mx_raw_next_event_t *e)
{
  struct raw_eventq_entry *entry;
  mx_instance_state_t *is = es->is;
  mx_uaddr_t recv_buffer;
  int status = 0;
  int sleep_status;
  uint32_t max_recv;
  unsigned long flags;

  flags = 0; /* useless initialization to pacify -Wunused on platforms
                where flags are not used */


  e->status = MX_KRAW_NO_EVENT;
  recv_buffer = (mx_uaddr_t)e->recv_buffer;
  max_recv = e->recv_bytes;

  mx_spin_lock_irqsave(&is->raw.spinlock, flags);
  entry = STAILQ_FIRST(&is->raw.pending_events);

  /* see if there is an event pending before sleeping */
  if (entry == NULL) {
    if (e->timeout == 0)
      goto out_locked;
    /* tell the interrupt handler to wake us up when
       an event arrives */
    is->raw.wakeup_needed = 1;
    mx_spin_unlock_irqrestore(&is->raw.spinlock, flags);
    sleep_status = mx_sleep(&is->raw.sync, e->timeout, MX_SLEEP_INTR);
    if (sleep_status != 0) {
      /* return if no event pending */
      goto out_with_nothing;
    }
    /* check to see if the mcp died, since the raw endpoint opener
       will want to know about it */
    if (mx_is_dead(is)) {
      e->status = is->saved_state.reason;
      goto out_with_nothing;
    }

    /* pick up the lock and look for an event.  There
       may not be an event pending due to a mapper tick */
    mx_spin_lock_irqsave(&is->raw.spinlock, flags);
    entry = STAILQ_FIRST(&is->raw.pending_events);
    if (entry == NULL)
      goto out_locked;
  }
  /* remove the event from the queue, and copy info
     to user buffer */

  STAILQ_REMOVE_HEAD(&is->raw.pending_events, entries);

  bcopy(&entry->event, e, sizeof (*e));
  /* free the event */
  STAILQ_INSERT_TAIL(&is->raw.free_events, entry, entries);

 out_locked:
  mx_spin_unlock_irqrestore(&is->raw.spinlock, flags);

  if (e->status == MX_KRAW_RECV_COMPLETE) {
    /* copy out raw buffer to userspace */
    int idx = is->raw.rx_cnt & is->raw.max_rx_idx;
    int offset = is->raw.rx_cnt * MX_RAW_BYTES;
    int copyout_len;
    char *buf = is->raw.rx_bufs[idx].buf;
    is->raw.rx_cnt++;   

    if (e->recv_bytes > max_recv)
      copyout_len = max_recv;
    else
      copyout_len = e->recv_bytes;

    /*    printf("recv %d 0x%x @ 0x%x\n", e->recv_bytes, *(int *)buf,
	  ntohl(is->raw.rx_bufs[idx].dma.low));*/
    status = mx_copyout(buf, recv_buffer, copyout_len, es->is_kernel);

    /* Tell the mcp we've consumed the recv */
    *(uint32_t *)(is->lanai.sram + is->raw.host_rx) = htonl(offset);  
  } else if (e->status == MX_KRAW_SEND_COMPLETE) {
    is->raw.tx_done++;
  }

 out_with_nothing:

  /*  printf("%s:%d: return %d (%d,%d)\n", __FUNCTION__, __LINE__, status,
      is->raw.tx_done, is->raw.rx_cnt);*/
  return status;
}

int
mx_kraw_send(mx_endpt_state_t *es, mx_raw_send_t *s)
{
  mx_instance_state_t *is = es->is;
  mcp_kreq_t kreq;    
  mcp_kreq_raw_t *req;
  int idx, status;
  uint32_t context_low, context_high;
  uint16_t route_length;
  mx_rawbuf_t *tx_buf;
  unsigned long flags;
  unsigned msg_offset;

  flags = 0; /* useless initialization to pacify -Wunused on platforms
                where flags are not used */


  if ((is->raw.tx_req - is->raw.tx_done) > (is->raw.max_tx_idx - 2))
    return EBUSY;

  idx = is->raw.tx_req & is->raw.max_tx_idx;
  tx_buf = &is->raw.tx_bufs[idx];
  route_length = s->route_length;
 
  if (s->buffer_length > MX_RAW_BYTES - route_length) {
    return EINVAL;
  }
  if (s->physical_port >= is->num_ports) {
    return EINVAL;
  }

  /* commit to using this buf */
  is->raw.tx_req++;

  /* copy in the user's route and raw send in the same buffer */
  /*  printf("%s:%d len = %d, addr = 0x%"PRIx64"\n", 
      __FUNCTION__, __LINE__, s->route_length, s->route_pointer); */

  status = 0;
  if (route_length != 0)
    status = mx_copyin((mx_uaddr_t)s->route_pointer, tx_buf->buf, route_length,
			 es->is_kernel);
  if (is->board_type == MX_BOARD_TYPE_Z) {
    /* pretend there is no route (make it part of the packet */
    status |= mx_copyin((mx_uaddr_t)s->data_pointer, 
			(char *)tx_buf->buf + route_length,
			s->buffer_length, 
			es->is_kernel);
    s->buffer_length += route_length;
    route_length = 0;
    msg_offset = 0;
  } else {
    msg_offset = MX_ROUND_ROUTE(route_length);
    status |= mx_copyin((mx_uaddr_t)s->data_pointer, 
			(char *)tx_buf->buf + msg_offset,
			s->buffer_length, 
			es->is_kernel);
  }
  if (status != 0)
    return status;
  
  /*  printf("send %d, 0x%x @ 0x%x\n", s->buffer_length, 
	 *(int *)((char *)tx_buf->buf + MX_RAW_MAXROUTE),
	 ntohl(tx_buf->dma.low));*/
  context_low = MX_LOWPART_TO_U32(s->context);
  context_high = MX_HIGHPART_TO_U32(s->context);

  /* post it in the kernel request queue */
  bzero(&kreq, sizeof(kreq));
  req = &kreq.raw.req;
  req->addr_high = tx_buf->dma.high;   	/* pre-swapped */
  req->addr_low = tx_buf->dma.low;	/* pre-swapped */
  req->context_low = htonl(context_low);
  req->context_high = htonl(context_high);
  req->msg_offset = htons(msg_offset);
  req->msg_length = htons((uint16_t)s->buffer_length);
  req->route_length = htons((uint16_t)route_length);
  req->port = (uint8_t)s->physical_port;	/* 1 byte, no swap */
  req->type = MX_MCP_KREQ_RAW;			/* 1 byte, no swap */
  
  mx_spin_lock_irqsave(&is->kreqq_spinlock, flags);
  is->board_ops.write_kreq(is, &kreq);
  mx_spin_unlock_irqrestore(&is->kreqq_spinlock, flags);
  return 0;
}

int
mx_kraw_set_route_begin(mx_endpt_state_t *es)
{
  mx_instance_state_t *is = es->is;
  int i;

  /* zero out the pointers to routes */
  for (i = 0; i < is->num_ports; i++) {
    int j;
    bzero(is->routes[i].offsets, 
	  sizeof(is->routes[i].offsets[0]) * mx_max_nodes);
    if (is->id < MX_PEER_FLAG_SEEN_NB_BOARDS) {
      for (j=0; j <= mx_biggest_peer; j++)
	mx_peer_table[j].flags &= ~(MX_PEER_FLAG_SEEN_P0 << (is->id * 2 + i));
    }
  }
  bzero(is->raw.valid_route_count, sizeof(is->raw.valid_route_count));
  return 0;
}


static int
mx_kraw_set_route_end_helper(mx_instance_state_t *is, mx_routes_t *routes, int port)
{
  int peer_index, status;
  uint32_t dummy;

  mx_pio_memcpy (routes->mcp_table, routes->host_table, 
	       (mx_biggest_peer + 1) * routes->block_size, MX_PIO_FLUSH | MX_PIO_32BYTE_FLUSH);
  
  peer_index = 0;
  while(peer_index <= mx_biggest_peer) {
    status = mx_lanai_command(is, MX_MCP_CMD_UPDATE_ROUTES, port, 
			      peer_index, MX_VPAGE_SIZE / routes->block_size, 
			      &dummy, &routes->cmd_sync);

    if (status != 0) {
      MX_WARN(("Updating routes failed on board %d\n", is->id));
      return ENXIO;
    }

    peer_index += MX_VPAGE_SIZE / routes->block_size;
  }
  return 0;
}

int
mx_kraw_set_route_end(mx_endpt_state_t *es)
{
  mx_instance_state_t *is = es->is;
  int i, status;

  for (i = 0; i < is->num_ports; i++)
    if (is->raw.valid_route_count[i]) {
      status = mx_kraw_set_route_end_helper(is, &is->routes[i], i);
      if (status) {
	return status;
      }
    }
  mx_query_peer(is, 0);
  return 0;
}


/*
 * Sets a route for use by the mcp.
 * 
 * 1) All routes begin aligned on an 8 byte boundary.
 * 2) The length of the route is stored just before that boundary.
 * 3) The routes are repeated to fill up all the slots.
 *
 */

int
mx_kraw_set_route(mx_endpt_state_t *es, mx_set_route_t *r, int clear)
{
  mx_instance_state_t *is = es->is;
  uint32_t mac_low32;
  uint16_t mac_high16;
  int hash_index, peer_index, status;
  signed char *block;
  mx_routes_t *routes;
  int offset, block_size;
  unsigned int source_port;
  unsigned int length;
  uint8_t hops[64];
  mx_peer_hash_t *bin;

  source_port = r->source_port;
  if (source_port >= is->num_ports)
    return EINVAL;

  routes = &is->routes[source_port]; /* only handle port 0 for now, mapper will
			      tell us which port in future */

  MX_DEBUG_PRINT(MX_DEBUG_MAPPER, ("%s called\n", __FUNCTION__));

  length = r->route_length;

  block_size = routes->block_size;

  if (length > MX_RAW_MAXROUTE)
    return ENOSPC;

  status = mx_copyin((mx_uaddr_t)r->route_pointer, hops, length, es->is_kernel);
  if (status)
    return status;

  mac_low32 = r->mac_low32;
  mac_high16 = r->mac_high16;

  /* lookup mx mac address in hash or overflow table */
  bin = mx_peer_lookup(mac_high16, mac_low32);
  if (!bin) {
    MX_DEBUG_PRINT(MX_DEBUG_MAPPER, ("Adding address 0x%x%x to peer hash at index %d\n", 
			       mac_high16, mac_low32, hash_index));
    status = mx_add_peer(&hash_index, mac_high16, mac_low32);
    if (status == ENOSPC) {
      MX_WARN(("mx%d: Peer table full, dropping node 0x%x%x",
	       is->id, mac_high16, mac_low32));
      return ENOENT;
    }
    bin = mx_peer_lookup(mac_high16, mac_low32);
  }
  peer_index = bin->index;

  if (is->id < MX_PEER_FLAG_SEEN_NB_BOARDS) {
    mx_peer_table[peer_index].flags |= 
      MX_PEER_FLAG_SEEN_P0 << (source_port + is->id * 2);
  }
  mx_update_peer_type(r->host_type, peer_index, 0);

  offset = routes->offsets[peer_index];
  
  /* find base of routing block */
  block = (signed char *) &routes->host_table[peer_index * block_size];

  if (clear) {
    bzero(block, block_size);
    routes->offsets[peer_index] = 0;
    is->raw.valid_route_count[source_port]--;

    return 0;
  }
  
  
  /* copy the route.  
     When writing routes, we start them on an 8 byte boundary,
     so they will be properly aligned for the firmware.  We
     put the length at the end */

  if (offset + MX_RAW_MAXROUTE + 1 > block_size
      || length > MX_RAW_MAXROUTE) {
    /* no space available to store the route */
   MX_DEBUG_PRINT(MX_DEBUG_MAPPER,
		   ("mx%d: no space for rt to host %x%x (%d, %d, %d))\n", 
		    is->id, mac_high16, mac_low32, offset, length, block_size));
    return ENOSPC;
  }


  /* copy the route */
  bcopy(hops, &block[offset], length);

  /* mark directly connected nodes so that mx_info can
     properly display them */

  if (length == 0 && offset == 0)
    block[0] = 0xff;

  offset += MX_RAW_MAXROUTE + 1;
  
  /* update the length of this route.  we store the length
     at the end of the route just prior to the next one */

  block[offset - 1] = length;

  if (offset < block_size) {
    /* fill the remaining slots by repeating the sequence given so
       far */
    int o;
    for (o =0 ; o < block_size - offset; o += MX_RAW_MAXROUTE + 1) {
      mx_always_assert(offset + o + MX_RAW_MAXROUTE + 1 <= block_size);
      memcpy(block + offset + o, block + o, MX_RAW_MAXROUTE + 1);
    }
  }
  routes->offsets[peer_index] = offset;
  is->raw.valid_route_count[source_port]++;
  return 0;
}

int
mx_kraw_tick_change(mx_instance_state_t *is, int enable)
{
  return mx_mcpi.set_param(is->id, is->lanai.sram, "mapping", enable);
}

/*
 * Allocate a big array of vpages, and then stuff them into rx and 
 * tx descriptors.
 */

int
mx_kraw_init(mx_instance_state_t *is)
{
  struct mx_raw_info *raw;
  struct mx_raw_vpage *v;
  struct raw_eventq_entry *entry;
  int i, status, len, num_tx_bufs, num_rx_bufs, num_rx_vpages;
  uint32_t mcp_rx_vpages;
  

  raw = &is->raw;

  if (raw->vpages != NULL)
    return EBUSY;

  mx_spin_lock_init(&raw->spinlock, is, -1, "raw spinlock");  
  mx_sync_init(&raw->sync, is, -1, "raw sync");  
  STAILQ_INIT(&raw->free_events);
  STAILQ_INIT(&raw->pending_events);

  status = mx_lanai_command(is, MX_MCP_CMD_CLEAR_RAW_STATE, 0, 0, 0,
                            (uint32_t *)&i, &is->raw.sync);
  if (status != 0) {
    MX_WARN(("mx%d: Could not clear raw state\n", is->id));
    mx_spin_lock_destroy(&raw->spinlock);
    mx_sync_destroy(&raw->sync);
    return ENXIO;
  }

  is->raw.rx_cnt = 0;
  is->raw.tx_req = 0;
  is->raw.tx_done = 0;

  if (raw->max_tx_idx == 0)
    raw->max_tx_idx = is->kreqq_max_index;

  num_tx_bufs = raw->max_tx_idx + 1;

  if (raw->max_rx_idx == 0)
    raw->max_rx_idx = MX_RAW_NUM_RX_BUFS - 1;

  num_rx_bufs = raw->max_rx_idx + 1;

  raw->num_vpages = ((num_tx_bufs + num_rx_bufs) * MX_RAW_BYTES) / MX_VPAGE_SIZE;

  raw->vpages = mx_kmalloc(raw->num_vpages * sizeof (raw->vpages[0]), MX_WAITOK|MX_MZERO);
  if (raw->vpages == NULL) {
    mx_spin_lock_destroy(&raw->spinlock);
    mx_sync_destroy(&raw->sync);
    return ENOMEM;
  }
  for (i = 0, v = raw->vpages; i < raw->num_vpages; i++, v++) {
    status = mx_alloc_zeroed_dma_page(is, &v->alloc_addr, &v->buf, &v->pin);
    if (status) {
      mx_kraw_destroy(is);
      return status;
    }
  }

  status = mx_mcpi.get_param(is->id, is->lanai.sram, "raw_recv_vpages",
			      &mcp_rx_vpages);
  status |= mx_mcpi.get_param(is->id, is->lanai.sram, "raw_host_recv_offset", 
			      (unsigned int *)&is->raw.host_rx);
  if (status) {
    mx_kraw_destroy(is);
    return status;
  }

  num_rx_vpages = (num_rx_bufs * MX_RAW_BYTES) / MX_VPAGE_SIZE;
  for (i = 0; i < num_rx_vpages; i++) {
    MX_PIO_WRITE(&((mcp_dma_addr_t *)(is->lanai.sram + mcp_rx_vpages))[i].low,
		 htonl(raw->vpages[i].pin.dma.low));
    MX_PIO_WRITE(&((mcp_dma_addr_t *)(is->lanai.sram + mcp_rx_vpages))[i].high,
		 htonl(raw->vpages[i].pin.dma.high));
    MX_STBAR();
  }

  raw->rx_bufs = mx_kmalloc(num_rx_bufs * sizeof (raw->rx_bufs[0]),
			    MX_WAITOK | MX_MZERO);
  if (raw->rx_bufs == NULL) {
    mx_kraw_destroy(is);
    return ENOMEM;
  }

  raw->tx_bufs = mx_kmalloc((num_tx_bufs) * sizeof (raw->tx_bufs[0]),
			    MX_WAITOK | MX_MZERO);

  if (raw->tx_bufs == NULL) {
    mx_kraw_destroy(is);
    return ENOMEM;
  }

  for (i = 0; i < num_tx_bufs + num_rx_bufs; i++) {
    entry = mx_kmalloc(sizeof (*entry), MX_WAITOK | MX_MZERO);
    if (entry == NULL) {
      mx_kraw_destroy(is);
      return ENOMEM;
    }
     STAILQ_INSERT_TAIL(&raw->free_events, entry, entries);
  }


  v = raw->vpages;
  len = MX_VPAGE_SIZE;
  for (i = 0; i < num_rx_bufs; i++) {
    raw->rx_bufs[i].buf = v->buf;
    raw->rx_bufs[i].dma.low = htonl(v->pin.dma.low + (MX_VPAGE_SIZE - len));
    raw->rx_bufs[i].dma.high = htonl(v->pin.dma.high);
    v->buf += MX_RAW_BYTES;
    len -= MX_RAW_BYTES;
    if (len < MX_RAW_BYTES) {
      v++;
      len = MX_VPAGE_SIZE;
    }
  }


  for (i = 0; i < num_tx_bufs; i++) {
    raw->tx_bufs[i].buf = v->buf;
    raw->tx_bufs[i].dma.low = htonl(v->pin.dma.low + (MX_VPAGE_SIZE - len));
    raw->tx_bufs[i].dma.high = htonl(v->pin.dma.high);
    v->buf += MX_RAW_BYTES;
    len -= MX_RAW_BYTES;
    if (len < MX_RAW_BYTES) {
      v++;
      len = MX_VPAGE_SIZE;
    }
  }


  status = mx_mcpi.set_param(is->id, is->lanai.sram, "raw_recv_enabled", 1);

  if (status) {
    MX_WARN(("Could not enable raw recvs in the mcp\n"));
    mx_kraw_destroy(is);
  }
  return status;
}

void
mx_kraw_destroy(mx_instance_state_t *is)
{
  struct mx_raw_info *raw;
  struct mx_raw_vpage *v;
  struct raw_eventq_entry *entry;
  mx_sync_t tmp_sync;
  int i, status;

  raw = &is->raw;

  (void)mx_kraw_tick_change(is, 0);
  mx_mcpi.set_param(is->id, is->lanai.sram, "raw_recv_enabled", 0);
  MX_STBAR();

  /* Issue a command to clear the raw state.  This is done mainly to
     act as a barrier, so we know we are done processing raw
     interrupts before we start to free resources used in the
     interrupt handler */
     
  mx_sync_init(&tmp_sync, is, 0, "kraw destroy tmp sync");
  status = mx_lanai_command(is, MX_MCP_CMD_CLEAR_RAW_STATE, 0, 0, 0,
                            (uint32_t *)&i, &tmp_sync);
  if ((status != 0) && !mx_is_dead(is)) {
    MX_WARN(("mx%d: MX_MCP_CMD_CLEAR_RAW_STATE returns %d in mx_kraw_destroy\n",
	     is->id, status));
  }
  mx_sync_destroy(&tmp_sync);

  if (raw->tx_bufs != NULL) {
    mx_kfree(raw->tx_bufs);
    raw->tx_bufs = NULL;
  }
  if (raw->rx_bufs != NULL) {
    mx_kfree(raw->rx_bufs);
    raw->rx_bufs = NULL;
  }
  if (raw->vpages != NULL) {
    for (i = 0, v = raw->vpages; i < raw->num_vpages; i++, v++) {
      if (v->alloc_addr != NULL) {
	 mx_free_dma_page(is, &v->alloc_addr, &v->pin);
	 v->alloc_addr = NULL;
      }
    }
    mx_spin_lock_destroy(&raw->spinlock);
    mx_sync_destroy(&raw->sync);
    mx_kfree(raw->vpages);
    raw->vpages = NULL;
  }

  while ((entry = STAILQ_FIRST(&is->raw.free_events)) != NULL) {
      STAILQ_REMOVE_HEAD(&raw->free_events, entries);
      mx_kfree(entry);
  }

  while ((entry = STAILQ_FIRST(&is->raw.pending_events)) != NULL) {
      STAILQ_REMOVE_HEAD(&raw->pending_events, entries);
      mx_kfree(entry);
  }


}

